[65]:
import numpy as np
import pandas as pd

import scipy.optimize
import scipy.stats as st

import iqplot

import tqdm

import bokeh.io
import bokeh.plotting
bokeh.io.output_notebook()
Loading BokehJS ...
[2]:
df = pd.read_csv('../data/singer_transcript_counts.csv', comment='#')

df.head()
[2]:
Rex1 Rest Nanog Prdm14
0 11 34 39 0
1 172 91 33 5
2 261 70 68 0
3 178 54 88 1
4 129 54 41 0
[5]:
n = df['Nanog'].values
[6]:
p = iqplot.ecdf(
    n,
    x_axis_label='nanog count'
)

bokeh.io.show(p)
[66]:
def log_like_nbinom(params, n):
    """Log likelihood for repeated measurements from NBinom."""
    alpha, b = params

    if alpha <= 0 or b <=0:
        return -np.inf

    beta = 1 / b
    return np.sum(st.nbinom.logpmf(n, alpha, beta/(1+beta)))


def neg_log_like_nbinom(params, n):
    return -log_like_nbinom(params, n)


def nbinom_mle(n):
    nbar = np.mean(n)
    s2 = np.var(n)
    beta0 = nbar / (s2 - nbar)
    alpha0 = nbar * beta0

    params0 = np.array([alpha0, 1 / beta0])

    res = scipy.optimize.minimize(
        neg_log_like_nbinom, params0, args=(n,), method='Powell'
    )

    alpha, b = res.x

    return alpha, b


def gen_nbimon(alpha, b, size):
    beta = 1 / b
    return np.random.negative_binomial(alpha, beta / (1 + beta), size=size)


def draw_parametric_bs_reps_mle(
    mle_fun, gen_fun, data, args=(), size=1, progress_bar=False
):
    params = mle_fun(data, *args)

    if progress_bar:
        iterator = tqdm.tqdm(range(size))
    else:
        iterator = range(size)

    return np.array(
        [mle_fun(gen_fun(*params, size=len(data)), *args) for _ in iterator]
    )
[73]:
bs_reps = draw_parametric_bs_reps_mle(
    nbinom_mle,
    gen_nbimon,
    n,
    size=10_000,
    progress_bar=True
)
100%|████████████████████████████████████████████████████████| 10000/10000 [00:47<00:00, 208.62it/s]
[71]:
np.percentile(bs_reps[:,0], [2.5, 97.5])
[71]:
array([1.07966995, 1.49956886])
[72]:
np.percentile(bs_reps[:,1], [2.5, 97.5])
[72]:
array([56.40912929, 84.05777092])
[74]:
import bebi103
[79]:
rg = np.random
[81]:
def gen_nbimon(params, size, rg):
    alpha, b = params
    beta = 1 / b
    return rg.negative_binomial(alpha, beta / (1 + beta), size=size)


[83]:
bs_reps = bebi103.bootstrap.draw_bs_reps_mle(
    nbinom_mle,
    gen_nbimon,
    n,
    size=10_000,
)

 12%|███████                                                  | 1246/10000 [00:20<01:01, 142.34it/s]
[84]:
bs_reps
[84]:
array([[ 1.35241872, 61.79183402],
       [ 1.37863287, 52.93115143],
       [ 1.15899676, 74.42410383],
       ...,
       [ 1.20938007, 64.19263231],
       [ 1.33370964, 70.62928845],
       [ 1.32687391, 62.26093127]])
[90]:
df_res = pd.DataFrame(data=bs_reps, columns=['α*', 'β*'])

p = bebi103.viz.corner(
    samples=df_res,
    parameters=['α*', 'β*'],
    plot_ecdf=True
    # show_contours=True,
    # levels=[0.95],
)

bokeh.io.show(p)
[38]:
p = iqplot.ecdf(
    n,
    x_axis_label='nanog count',
    conf_int=True
)

beta = 1 / b

n_theor = np.arange(0, 501)
cdf_theor = st.nbinom.cdf(n_theor, alpha, beta/(1+beta))
p.circle(n_theor, cdf_theor, color='orange')

bokeh.io.show(p)
[40]:
np.random.negative_binomial(alpha, beta/(1+beta), size=len(n))
[40]:
array([157,  78, 151,  99,  82, 108, 130, 136,  36,  78, 186,  64, 137,
        44,  18, 136,  82, 180,  47,  65,  62,  28,   9,  49,  77, 230,
        25,  54, 125,  65,  28,  71,  55, 107,  18, 199,  20,  40, 153,
         0,  16, 140,  28,  85,  84, 223, 195, 293,  33, 172, 138,  47,
        13,  18,  62,  11,  52,  10, 175,  29,   9,  36, 146,  69, 104,
        73, 144,  67, 134,  16,  89,   7,  27,  76,  32,  59,  12,  77,
       126, 197,  56,  12,  44,  37, 190,  78,  49,  73,  54,   7, 133,
        61,  96, 350,  41,  43,   4, 121,  81, 136,  62,  28, 131,  44,
       169,  62,  37, 261,  41,  95,  33,   9, 142,  64, 129,  68,   1,
        32,  95,  96,  57,  23, 199,  33,  57, 125, 104,  39, 179,   5,
        56,  18,  95,  46,  46,  77, 133,  81, 165, 162, 122,  35,  80,
        32,  72,  27,   7,  84, 104,  18,  56, 171,  28, 213,  99,  42,
        65,  97,  53,  16,   9,  17,  33,  76,  94,  57, 133,  53,  46,
       124,  24,  60, 205, 150, 107,  76,  76,  50,   4, 334,  75,  82,
        14, 100, 132,  58,  30,  19,  24,  81,  33,  38, 120,  94, 129,
        38,  73,  24, 117,  15,  66,  23,   7, 190, 140,  59, 164,  41,
       156,   9,  53,   9,  29, 105,  45,  46, 129,   8, 300,   6,  27,
        10,  34, 155,  69,   5,  28,  20, 150, 131,  56,  76, 204,  45,
       171,  60,  20, 121, 107, 213, 218,  46,  53,  13, 138,  29, 161,
       104, 350,  10, 185,  16,  86,  48,  30,  60, 159,  76, 263,  59,
       209,  85, 166,   2, 164,  22,  38,  68, 221,  64,  40,   6, 101,
       227,  73, 126,  24, 161,  27])
[ ]: